Bayesian differential cell type proportion analysis: Bayesian multinomial logistic regression model initialization and example application to a generated toy dataset.
Install the model_multinom.R function by cloning the github repository and load the function in R.
source('./model_multinom.R')
In this example, we generate a toy dataset and apply the Bayesian multinomial logistic regression model to the distributions of 10 randomly generated samples (labelled 5 younger and 5 older) with max 10000 counts with 3 cell types with set proportions that add up to 1.
library(tidyverse)
## Warning: replacing previous import 'vctrs::data_frame' by 'tibble::data_frame'
## when loading 'dplyr'
library(rjags)
library(coda)
library(hablar)
library(patchwork)
set.seed(562)
# 5 younger samples with set probabilities (0.1, 0.2, 0.7) of 3 categories (i.e. cell types)
sim.data.Y <- t(rmultinom(5, size = 10000, prob = c(0.1,0.2,0.7)))
# 5 older samples with set probabilities (0.2, 0.3, 0.5) of 3 categories (i.e. cell types)
sim.data.O <- t(rmultinom(5, size = 10000, prob = c(0.2,0.3,0.5)))
#bind sample count together
sim.data <- rbind(sim.data.Y, sim.data.O)
#create an age variable with 3 younger samples and 3 older samples
age.group <- c(rep(1,5), rep(2,5))
#pull simulated data and age variable in jags data object
jags.data <- list(y = sim.data, #simulated counts
age.group = factor(age.group), #age group label (younger = 1, older = 2)
N.sample = nrow(sim.data), #number of samples
N.ct = ncol(sim.data), #number of cell types
N.total = rep(10000,10), #total number of counts per sample
N.age = 2, #number of age groups
sex = sample(1:2, 10, replace = TRUE) #sex label
)
jags.data
## $y
## [,1] [,2] [,3]
## [1,] 1019 1993 6988
## [2,] 973 2031 6996
## [3,] 964 1994 7042
## [4,] 1043 1977 6980
## [5,] 1009 1991 7000
## [6,] 1941 3008 5051
## [7,] 2066 2909 5025
## [8,] 2061 2906 5033
## [9,] 2005 2902 5093
## [10,] 2052 2929 5019
##
## $age.group
## [1] 1 1 1 1 1 2 2 2 2 2
## Levels: 1 2
##
## $N.sample
## [1] 10
##
## $N.ct
## [1] 3
##
## $N.total
## [1] 10000 10000 10000 10000 10000 10000 10000 10000 10000 10000
##
## $N.age
## [1] 2
##
## $sex
## [1] 1 1 1 1 1 2 2 1 1 2
#Run model with 500 burn-in iterations and 1000 total iterations
jags <- jags.model(textConnection(multinom.model),data=jags.data, n.adapt=500, inits = list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 10))
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 10
## Unobserved stochastic nodes: 9
## Total graph size: 132
##
## Initializing model
#monitor predicted probabilities p
test <- coda.samples(jags, c('p'), n.adapt = 500, n.iter=1000)
#Get estimates of p[i,j] for sample i and category j
coda.summary <- summary(test)
predicted.prob <- coda.summary$statistics %>% as_tibble() %>% select(Mean)
predicted.prob <- matrix(as.vector(predicted.prob$Mean), nrow = 10, ncol = 3)
predicted.prob
## [,1] [,2] [,3]
## [1,] 0.1001902 0.1995911 0.7002187
## [2,] 0.1001902 0.1995911 0.7002187
## [3,] 0.1001902 0.1995911 0.7002187
## [4,] 0.1001902 0.1995911 0.7002187
## [5,] 0.1001902 0.1995911 0.7002187
## [6,] 0.2019915 0.2949450 0.5030634
## [7,] 0.2019915 0.2949450 0.5030634
## [8,] 0.2032738 0.2903492 0.5063770
## [9,] 0.2032738 0.2903492 0.5063770
## [10,] 0.2019915 0.2949450 0.5030634
#predicted probabilities for each sample should add up to 1
apply(predicted.prob,1,sum)
## [1] 1 1 1 1 1 1 1 1 1 1
plot(test)
autocorr.plot(test)
geweke.diag(test)
## [[1]]
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## p[1,1] p[2,1] p[3,1] p[4,1] p[5,1] p[6,1] p[7,1] p[8,1] p[9,1] p[10,1]
## -0.6727 -0.6727 -0.6727 -0.6727 -0.6727 -1.5665 -1.5665 1.6528 1.6528 -1.5665
## p[1,2] p[2,2] p[3,2] p[4,2] p[5,2] p[6,2] p[7,2] p[8,2] p[9,2] p[10,2]
## 0.6605 0.6605 0.6605 0.6605 0.6605 0.8841 0.8841 -0.7203 -0.7203 0.8841
## p[1,3] p[2,3] p[3,3] p[4,3] p[5,3] p[6,3] p[7,3] p[8,3] p[9,3] p[10,3]
## 0.1190 0.1190 0.1190 0.1190 0.1190 0.8540 0.8540 -0.1809 -0.1809 0.8540